{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from polars import DataFrame, Series\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Small intro to Polars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "+-------+-------+\n",
       "| name  | value |\n",
       "| ---   | ---   |\n",
       "| str   | i64   |\n",
       "+=======+=======+\n",
       "| \"ham\" | 1     |\n",
       "+-------+-------+\n",
       "| \"foo\" | 2     |\n",
       "+-------+-------+\n",
       "| \"bar\" | 3     |\n",
       "+-------+-------+\n",
       "| \"ham\" | 4     |\n",
       "+-------+-------+\n",
       "| \"ham\" | 5     |\n",
       "+-------+-------+\n",
       "| \"foo\" | 6     |\n",
       "+-------+-------+"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = DataFrame({\"name\": [\"ham\", \"foo\", \"bar\", \"ham\", \"ham\", \"foo\"], \n",
    "                \"value\": [1, 2, 3, 4, 5, 6], \n",
    "                })\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can check the head and tail of the DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "+-------+-------+\n",
       "| name  | value |\n",
       "| ---   | ---   |\n",
       "| str   | i64   |\n",
       "+=======+=======+\n",
       "| \"ham\" | 1     |\n",
       "+-------+-------+\n",
       "| \"foo\" | 2     |\n",
       "+-------+-------+\n",
       "| \"bar\" | 3     |\n",
       "+-------+-------+"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "+-------+-------+\n",
       "| name  | value |\n",
       "| ---   | ---   |\n",
       "| str   | i64   |\n",
       "+=======+=======+\n",
       "| \"ham\" | 4     |\n",
       "+-------+-------+\n",
       "| \"ham\" | 5     |\n",
       "+-------+-------+\n",
       "| \"foo\" | 6     |\n",
       "+-------+-------+"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can select a column and slice the results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Series: str \n",
       "[\n",
       "\t\"ham\"\n",
       "\t\"foo\"\n",
       "\t\"bar\"\n",
       "]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"name\"][:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can do standard arithmetic with the series."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Series: i64 \n",
      "[\n",
      "\t12\n",
      "\t24\n",
      "\t36\n",
      "\t48\n",
      "\t60\n",
      "\t72\n",
      "]\n",
      "Series: i64 \n",
      "[\n",
      "\t1\n",
      "\t4\n",
      "\t9\n",
      "\t16\n",
      "\t25\n",
      "\t36\n",
      "]\n"
     ]
    }
   ],
   "source": [
    "print(df[\"value\"] * 12)\n",
    "print(df[\"value\"] * df[\"value\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Or do aggregations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.5\n",
      "1\n",
      "6\n"
     ]
    }
   ],
   "source": [
    "print(df[\"value\"].mean())\n",
    "print(df[\"value\"].min())\n",
    "print(df[\"value\"].max())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can create Series that are nullable. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Series: f64 \n",
       "[\n",
       "\tnull\n",
       "\t12\n",
       "\t3\n",
       "\tnull\n",
       "\t4\n",
       "\t5\n",
       "]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = Series(\"nullable\", [None, 12., 3., None, 4., 5.], nullable=True)\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The null values are ignored with agrregations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can stack that Series on top of our DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "+-------+-------+----------+\n",
       "| name  | value | nullable |\n",
       "| ---   | ---   | ---      |\n",
       "| str   | i64   | f64      |\n",
       "+=======+=======+==========+\n",
       "| \"ham\" | 1     | null     |\n",
       "+-------+-------+----------+\n",
       "| \"foo\" | 2     | 12       |\n",
       "+-------+-------+----------+\n",
       "| \"bar\" | 3     | 3        |\n",
       "+-------+-------+----------+\n",
       "| \"ham\" | 4     | null     |\n",
       "+-------+-------+----------+\n",
       "| \"ham\" | 5     | 4        |\n",
       "+-------+-------+----------+\n",
       "| \"foo\" | 6     | 5        |\n",
       "+-------+-------+----------+"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.hstack([s])\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Or remove columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "+-------+-------+\n",
       "| name  | value |\n",
       "| ---   | ---   |\n",
       "| str   | i64   |\n",
       "+=======+=======+\n",
       "| \"ham\" | 1     |\n",
       "+-------+-------+\n",
       "| \"foo\" | 2     |\n",
       "+-------+-------+\n",
       "| \"bar\" | 3     |\n",
       "+-------+-------+\n",
       "| \"ham\" | 4     |\n",
       "+-------+-------+\n",
       "| \"ham\" | 5     |\n",
       "+-------+-------+\n",
       "| \"foo\" | 6     |\n",
       "+-------+-------+"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_new = df.drop(\"nullable\")\n",
    "df_new"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can do groupby split apply operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "+-------+------------+\n",
       "| name  | value_mean |\n",
       "| ---   | ---        |\n",
       "| str   | f64        |\n",
       "+=======+============+\n",
       "| \"foo\" | 4          |\n",
       "+-------+------------+\n",
       "| \"ham\" | 3.333      |\n",
       "+-------+------------+\n",
       "| \"bar\" | 3          |\n",
       "+-------+------------+"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gb = df.groupby(by=\"name\", select=\"value\", agg=\"mean\")\n",
    "gb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And join the result with the original DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "+-------+-------+----------+------------+\n",
       "| name  | value | nullable | value_mean |\n",
       "| ---   | ---   | ---      | ---        |\n",
       "| str   | i64   | f64      | f64        |\n",
       "+=======+=======+==========+============+\n",
       "| \"ham\" | 1     | null     | 3.333      |\n",
       "+-------+-------+----------+------------+\n",
       "| \"foo\" | 2     | 12       | 4          |\n",
       "+-------+-------+----------+------------+\n",
       "| \"bar\" | 3     | 3        | 3          |\n",
       "+-------+-------+----------+------------+\n",
       "| \"ham\" | 4     | null     | 3.333      |\n",
       "+-------+-------+----------+------------+\n",
       "| \"ham\" | 5     | 4        | 3.333      |\n",
       "+-------+-------+----------+------------+\n",
       "| \"foo\" | 6     | 5        | 4          |\n",
       "+-------+-------+----------+------------+"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.join(gb, left_on=\"name\", right_on=\"name\", how=\"left\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[None, 12.0, 3.0, None, 4.0, 5.0]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"nullable\"].to_list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(\"out.csv\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
